6  Hydro Event Delineation

Purpose: Conduct baseflow separation and delineate hydrologic events to model in Gg framework

6.1 Data

6.1.1 Site info and daily data

Code
# site information and locations
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)
mapview(siteinfo_sp, zcol = "designation")
Code
# flow/yield (and temp) data 
dat <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_DailyWeekly.csv") %>%
  filter(!site_name %in% c("WoundedBuckCreek", "Brackett Creek"))

# flow/yield (and temp) data 
dat_sub <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw.csv") %>%
  filter(!site_name %in% c("WoundedBuckCreek", "Brackett Creek"))

# add water/climate year variables
dat <- add_date_variables(dat, dates = date, water_year_start = 4)
str(dat)
tibble [229,559 × 32] (S3: tbl_df/tbl/data.frame)
 $ station_no          : chr [1:229559] NA NA NA NA ...
 $ site_name           : chr [1:229559] "BigCreekLower" "BigCreekLower" "BigCreekLower" "BigCreekLower" ...
 $ site_id             : chr [1:229559] "BIG_001" "BIG_001" "BIG_001" "BIG_001" ...
 $ basin               : chr [1:229559] "Flathead" "Flathead" "Flathead" "Flathead" ...
 $ subbasin            : chr [1:229559] "Big Creek" "Big Creek" "Big Creek" "Big Creek" ...
 $ region              : chr [1:229559] "Flat" "Flat" "Flat" "Flat" ...
 $ lat                 : num [1:229559] 48.6 48.6 48.6 48.6 48.6 ...
 $ long                : num [1:229559] -114 -114 -114 -114 -114 ...
 $ elev_ft             : num [1:229559] 3429 3429 3429 3429 3429 ...
 $ area_sqmi           : num [1:229559] 81.2 81.2 81.2 81.2 81.2 ...
 $ designation         : chr [1:229559] "little" "little" "little" "little" ...
 $ date                : Date[1:229559], format: "2017-07-28" "2017-07-29" ...
 $ disch_reli          : num [1:229559] 1 1 1 1 1 1 1 1 1 1 ...
 $ temp_reli           : num [1:229559] NA NA NA NA NA NA NA NA NA NA ...
 $ flow_mean           : num [1:229559] 81.4 76.5 77.6 92.1 98.1 ...
 $ tempc_mean          : num [1:229559] NA NA NA NA NA NA NA NA NA NA ...
 $ flow_mean_filled    : num [1:229559] 81.4 76.5 77.6 92.1 98.1 ...
 $ flow_mean_cms       : num [1:229559] 2.3 2.17 2.2 2.61 2.78 ...
 $ flow_mean_filled_cms: num [1:229559] 2.3 2.17 2.2 2.61 2.78 ...
 $ area_sqkm           : num [1:229559] 210 210 210 210 210 ...
 $ Yield_mm            : num [1:229559] 0.946 0.89 0.902 1.071 1.141 ...
 $ Yield_filled_mm     : num [1:229559] 0.946 0.89 0.902 1.071 1.141 ...
 $ flow_mean_7         : num [1:229559] NA NA NA 91.3 94.4 ...
 $ flow_mean_filled_7  : num [1:229559] NA NA NA 91.3 94.4 ...
 $ tempc_mean_7        : num [1:229559] NA NA NA NA NA NA NA NA NA NA ...
 $ Yield_mm_7          : num [1:229559] NA NA NA 1.06 1.1 ...
 $ Yield_filled_mm_7   : num [1:229559] NA NA NA 1.06 1.1 ...
 $ CalendarYear        : num [1:229559] 2017 2017 2017 2017 2017 ...
 $ Month               : num [1:229559] 7 7 7 7 8 8 8 8 8 8 ...
 $ MonthName           : Factor w/ 12 levels "Apr","May","Jun",..: 4 4 4 4 5 5 5 5 5 5 ...
 $ WaterYear           : num [1:229559] 2018 2018 2018 2018 2018 ...
 $ DayofYear           : num [1:229559] 119 120 121 122 123 124 125 126 127 128 ...

6.1.2 Trim to focal site

Code
dat_wb <- dat %>% filter(site_name == "West Brook NWIS")

Raw flow at Big G

Code
dat_wb %>% select(date, Yield_filled_mm) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Total daily streamflow in yield (mm), over the period of record

6.2 Sensitivity analyses

6.2.1 Missing data

Many of the EcoDrought time series data are incomplete. At some sites, discharge data is available only during the summer and/or fall periods, and at other sites, time series data are interrupted due to malfunctioning sensors and/or ice formation (“ice spikes”). So how does the length of the time series affect baseflow separation (and subsequent event identification)? Wasko and Guo (2022) use a 67 day time series of flow to demonstrate the utility of the hydroEvents packages, suggesting digital baseflow separation techniques may be valid for relatively short time series.

Here, I perform a simple sensitivity analysis to explore the effect of time series length on the results of baseflow separation. Essentially, perform baseflow separation on increasingly smaller subsets of the data. With the default parameters, the minimum number of days/observations needed is 31. This is because the default number of points reflected at start and end of data (r) is 30. Reflection allows bf/bfi to be calculated over the entire period of record as the underlying baseflow separation equations result in “issues of”Warm-up” and “cool-down” as the recursive filter is moved forward and backward over the dataset” (Ladson et al. 2013, Australian Journal of Water Resources). baseflowB() uses a default reflection period of 30, which Ladson et al. (2013) found to “provide a realistic baselfow response for the start and end of the actual flow data”.

Code
dat_wb_sens <- dat_wb %>% mutate(bf_c = baseflowB(Yield_filled_mm)$bf, bfi_c = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_c, bfi_c) %>%
  left_join(dat_wb[1:(365*3),] %>% mutate(bf_3y = baseflowB(Yield_filled_mm)$bf, bfi_3y = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_3y, bfi_3y)) %>%
  left_join(dat_wb[1:(365),] %>% mutate(bf_1y = baseflowB(Yield_filled_mm)$bf, bfi_1y = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_1y, bfi_1y)) %>%
  left_join(dat_wb[1:(182),] %>% mutate(bf_6m = baseflowB(Yield_filled_mm)$bf, bfi_6m = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_6m, bfi_6m)) %>%
  left_join(dat_wb[1:(90),] %>% mutate(bf_3m = baseflowB(Yield_filled_mm)$bf, bfi_3m = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_3m, bfi_3m)) %>%
  left_join(dat_wb[1:(35),] %>% mutate(bf_1m = baseflowB(Yield_filled_mm)$bf, bfi_1m = baseflowB(Yield_filled_mm)$bfi) %>% select(date, bf_1m, bfi_1m))

6.2.1.1 Compare baseflow

Divergence in baseflow among datasets is a result of the reflected data of the shorter dataset not matching the actual data of the longer dataset. As a result, divergence really only occurs at the end of each time series and is generally small in magnitude.

Code
dat_wb_sens %>% select(date, bf_c, bf_3y, bf_1y, bf_6m, bf_3m, bf_1m) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily baseflow in yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Time series of baseflow derived from datasets of different lengths.

Code
ggpairs(dat_wb_sens %>% select(bf_c, bf_3y, bf_1y, bf_6m, bf_3m, bf_1m)) + geom_abline(intercept = 0, slope = 1, color = "red")

Pairs plot of baseflow derrived from datasets of different lengths. Red lines are 1:1.

6.2.1.2 Compare baseflow index

The story here is essentially the same as above: divergence is ~minimal and restricted to the end of each time series. However, we note that divergence in BFI appears to increase as absolute flow/baseflow decreases, because small differences in absolute space become much larger in relative space when absolute values are small.

Code
dat_wb_sens %>% select(date, bfi_c, bfi_3y, bfi_1y, bfi_6m, bfi_3m, bfi_1m) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily baseflow in yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Time series of baseflow index derived from datasets of different lengths.

Code
ggpairs(dat_wb_sens %>% select(bf_c, bf_3y, bf_1y, bf_6m, bf_3m, bf_1m)) + geom_abline(intercept = 0, slope = 1, color = "red")

Pairs plot of baseflow derrived from datasets of different lengths. Red lines are 1:1.

6.2.2 Time scale

Given that streamflow can change so quickly in small, headwater streams, are we missing a key part of the story by using flow data summarized as daily means? Using daily mean flow reduces the range of values, particularly at the upper end (i.e., high flows), and so we may be overlooking the g~G relationship at very high flows.

6.2.2.1 Organize data

First, set baseflow separation (for the Lyne-Hollick one-parameter digital recursive filter) and event delineation paramters (as in Wasko and Guo, 2022)

  • alp: alpha filter parameter, higher values “lower” the estimated baseflow (thus making it difficult to delineate events)
  • numpass: number of passes. Ladson et al. (2013) recommend 3 passes for daily data (default in baseflowB() function) and 9 passes for hourly data
  • thresh: baseflow index threshold for event delineation, higher threshold values make it “easier” to delineate events
Code
alp <- 0.925
numpass <- 9
thresh <- 0.75

Download 15-min NWIS data for big G (West Brook NWIS)

Code
wbnwis <- tibble(readNWISdata(sites = "01171100", service = "uv", startDate = "1980-01-01", endDate = Sys.Date(), tz = "America/New_York"))
wbnwis2 <- wbnwis[,c(2,3,4)]
names(wbnwis2) <- c("station_no", "datetime", "flow") 
wbnwis2 <- wbnwis2 %>% left_join(siteinfo %>% filter(site_name == "West Brook NWIS"))
attributes(wbnwis)$siteInfo
                 station_nm  site_no agency_cd timeZoneOffset
1 WEST BROOK NR WHATELY, MA 01171100      USGS         -05:00
  timeZoneAbbreviation dec_lat_va dec_lon_va       srs siteTypeCd hucCd stateCd
1                  EST   42.41437  -72.62876 EPSG:4326         ST            25
  countyCd network
1    25011    NWIS

Calculate yield

Code
wbnwis2 <- wbnwis2 %>% mutate(flow_mean_cms = flow*0.02831683199881, area_sqkm = area_sqmi*2.58999) %>%
  mutate(Yield_mm = flow_mean_cms * 3600 * (1/unique(area_sqkm)) * (1/1000000) * 1000)

6.2.2.2 15 min data BF

Code
wbnwis2b <- wbnwis2 %>% 
  filter(!is.na(Yield_mm)) %>% 
  mutate(bf = baseflowB(Yield_mm, alpha = alp, passes = 15)$bf, 
         bfi = baseflowB(Yield_mm, alpha = alp, passes = 15)$bfi)

dat_big <- wbnwis2b
events <- eventBaseflow(dat_big$Yield_mm, BFI_Th = thresh, bfi = dat_big$bfi)
events <- events %>% mutate(len = end - srt + 1)

# define positions of non-events
srt <- c(1)
end <- c(events$srt[1]-1)
for (i in 2:(dim(events)[1])) {
  srt[i] <- events$end[i-1]+1
  end[i] <- events$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events$end[dim(events)[1]]+1, end = dim(dat_big)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_big)[1])
eventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(events)[1]) { 
  isevent_vec[c(events[i,1]:events[i,2])] <- 1 
  eventid_vec[c(events[i,1]:events[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_big <- dat_big %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         big_event_yield = ifelse(isevent_vec == 1, Yield_mm, NA),
         big_nonevent_yield = ifelse(isevent_vec == 2, Yield_mm, NA),
         big_event_quick = big_event_yield - bf) %>%
  rename(big_yield = Yield_mm, big_bf = bf, big_bfi = bfi, big_flow = Yield_mm)

dat_big %>% select(datetime, big_flow, big_bf, big_event_yield, big_nonevent_yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Hourly yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

6.2.2.3 Baseflow separation

Summarize at hourly time-step and perform baseflow separation.

Code
wbnwis3 <- wbnwis2 %>% 
  filter(!is.na(Yield_mm)) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(datetime) %>% summarise(Yield_mm = mean(Yield_mm)) %>%
  ungroup() %>%
  mutate(bf = baseflowB(Yield_mm, alpha = alp, passes = numpass)$bf, 
         bfi = baseflowB(Yield_mm, alpha = alp, passes = numpass)$bfi)

wbnwis3 %>% select(datetime, Yield_mm, bf) %>% dygraph() %>% 
  dyRangeSelector() %>% dyAxis("y", label = "Hourly yield (mm)") %>% 
  dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

6.2.2.4 Event delineation

Delineate events

Code
dat_big <- wbnwis3
events <- eventBaseflow(dat_big$Yield_mm, BFI_Th = thresh, bfi = dat_big$bfi)
events <- events %>% mutate(len = end - srt + 1)
head(events)
  srt end which.max        max       sum len
1 169 197       177 0.15546303 3.1827600  29
2 217 222       220 0.08265451 0.4756305   6
3 313 337       321 0.10321018 2.2936842  25
4 442 477       446 0.08334546 2.5569349  36
5 479 483       482 0.06149426 0.2841519   5
6 503 514       508 0.08930487 0.7877657  12

Wrangle delineated event data

Code
# define positions of non-events
srt <- c(1)
end <- c(events$srt[1]-1)
for (i in 2:(dim(events)[1])) {
  srt[i] <- events$end[i-1]+1
  end[i] <- events$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events$end[dim(events)[1]]+1, end = dim(dat_big)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_big)[1])
eventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(events)[1]) { 
  isevent_vec[c(events[i,1]:events[i,2])] <- 1 
  eventid_vec[c(events[i,1]:events[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_big <- dat_big %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         big_event_yield = ifelse(isevent_vec == 1, Yield_mm, NA),
         big_nonevent_yield = ifelse(isevent_vec == 2, Yield_mm, NA),
         big_event_quick = big_event_yield - bf) %>%
  rename(big_yield = Yield_mm, big_bf = bf, big_bfi = bfi, big_flow = Yield_mm)
(dat_big)
# A tibble: 42,233 × 11
   datetime            big_flow  big_bf big_bfi isevent eventid noneventid
   <dttm>                 <dbl>   <dbl>   <dbl>   <dbl>   <int>      <int>
 1 2020-01-31 10:00:00   0.0801 0.00576  0.0719       2      NA          1
 2 2020-01-31 11:00:00   0.0926 0.00639  0.0690       2      NA          1
 3 2020-01-31 12:00:00   0.0805 0.00705  0.0876       2      NA          1
 4 2020-01-31 13:00:00   0.0652 0.00775  0.119        2      NA          1
 5 2020-01-31 14:00:00   0.0647 0.00847  0.131        2      NA          1
 6 2020-01-31 15:00:00   0.0658 0.00923  0.140        2      NA          1
 7 2020-01-31 16:00:00   0.0652 0.0100   0.154        2      NA          1
 8 2020-01-31 17:00:00   0.0658 0.0108   0.165        2      NA          1
 9 2020-01-31 18:00:00   0.0669 0.0117   0.174        2      NA          1
10 2020-01-31 19:00:00   0.0658 0.0125   0.191        2      NA          1
# ℹ 42,223 more rows
# ℹ 4 more variables: agneventid <int>, big_event_yield <dbl>,
#   big_nonevent_yield <dbl>, big_event_quick <dbl>

View time series output…a few things to note

  • Many delineated events are <24 hours in length
  • Much of the natural diel variation in streamflow (“stream breathing” due to diel cycle of ET) ends up being delineated as individual events
    • When viewed at this time-scale, there also seems to be variation in terms of whether or not the “stream breathing” exists, which could be due to changes in how the data were processed and subsequently smoothed (or not)
  • WMA interpolataion becomes an issue at the sub-daily time scale. I.e., the time scale of the data changes during observed (15-min) and interpolated (4-hour) periods
Code
dat_big %>% select(datetime, big_flow, big_bf, big_event_yield, big_nonevent_yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Hourly yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

6.2.2.5 Gg pseudo-analysis

Join big and little data

Code
dat_little <- dat_sub %>% 
  filter(site_name %in% c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(site_name, area_sqmi, datetime) %>% summarise(flow = mean(flow)) %>%
  ungroup() %>% mutate(flow_mean_cms = flow*0.02831683199881, area_sqkm = area_sqmi*2.58999)

# sites
sites <- unique(dat_little$site_name)

# site-specific basin area in square km
basinarea <- dat_little %>% group_by(site_name) %>% summarize(area_sqkm = unique(area_sqkm))

# calculate yield
yield_list <- list()
for (i in 1:length(sites)) {
  d <- dat_little %>% filter(site_name == sites[i])
  ba <- unlist(basinarea %>% filter(site_name == sites[i]) %>% select(area_sqkm))
  yield_list[[i]] <-  d %>% mutate(Yield_mm = flow_mean_cms * 3600 * (1/as.numeric(ba)) * (1/1000000) * 1000)
}
dat_little <- do.call(rbind, yield_list)

# wide
dat_wb2 <- dat_little %>% 
  filter(datetime >= min(dat_big$datetime) & datetime <= max(dat_big$datetime)) %>%
  left_join(dat_big) %>%
  group_by(site_name, agneventid) %>% 
  summarise(eventlen = n(),
            mindate = min(datetime),
            isevent = unique(isevent), 
            yield_little_cumul = sum(Yield_mm+0.01),
            yield_big_cumul = sum(big_flow+0.01),
            yield_little_cumul_log = log(yield_little_cumul),
            yield_big_cumul_log = log(yield_big_cumul),
            yield_little_mean_log = mean(log(Yield_mm+0.01)),
            yield_big_mean_log = mean(log(big_flow+0.01))) %>%
  ungroup() %>%
  filter(!is.na(isevent)) %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")),
         site_name_cd = as.numeric(site_name),
         z_yield_big_cumul_log = as.numeric(scale(yield_big_cumul_log, center = TRUE, scale = TRUE)),
         z_yield_big_mean_log = as.numeric(scale(yield_big_mean_log, center = TRUE, scale = TRUE)))
Alignment

At the hourly time-scale, how aligned are the time series data? Typically, Big G lags behind Little G by 2-4 hours, but this depends on the site.

Code
dat_little %>% select(datetime, site_name, Yield_mm) %>% spread(key = site_name, value = Yield_mm) %>% left_join(dat_big %>% select(datetime, big_flow)) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") %>% dyOptions(colors = c(hcl.colors(9, "Zissou 1"), "black")) 
Cumulative yield

Gg relationship with data summarized as cumulative yield per event/non-event

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cumul_log, y = yield_little_cumul_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

Mean yield

Gg relationship with data summarized as mean yield per event/non-event

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of mean (log) yield at Big G on mean (log) yield at little g during baseflow and event periods.

Facet by site

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point(alpha = 0.25) + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~site_name) +
  theme(legend.position = "none")

6.2.2.6 Summary

  • Event delineation at sub-daily (i.e., hourly) time scale is strongly affected by data processing steps (e.g., smoothing and interpolation by WMA) and landscape processes that are not the focus of this study (i.e., diel fluctuations in flow due to ET)
    • In some cases, changes in how WMA treats diel fluctuations may introduce further inconsistencies into downstream analyses
  • Even in small basins such as the West Brook, temporal mismatches between big and little g time series data due to routing time are evident (2-4 hours)
    • Because many of the event/non-event periods are very short, these mismatches may have a large effect on how reasonable it is to apply Big G events to little g data.
  • Inferences regarding the g~G relationship derived from hourly data generally do not match those derived from daily data.

6.3 Baseflow separation

Perform baseflow separation. See Wasko and Guo (2022, Hydrological Processes) for recommendations on parameterization, as different algorithms and alpha values can produce different results. Section 4.1: “…users should not simply use recommended filter parameter values from literature in combination with any baseflow filter code without verification of their choice of filter parameter. As the digital baseflow filter is not tied to any physical realism (Nathan and McMahon, 1990) and a larger fractional baseflow may aid identification of events—even if this is not strictly baseflow as per its definition (Linsley et al., 1958)…”.

It is not actually necessary to do this separately, as baseflow separation is conducted in the event delineation function internally. But it is helpful to view the results and explore how different parameterizations yield different baseflow contributions.

6.3.1 West Brook

Code
dat_wb <- dat %>% filter(site_name %in% c("West Brook NWIS", "West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook"))
dat_wb_bf <- dat_wb %>% 
  filter(!is.na(Yield_filled_mm)) %>% 
  select(site_name, basin, subbasin, WaterYear, date, Yield_filled_mm, flow_mean_filled_cms, area_sqkm) %>%
  group_by(site_name) %>%
  mutate(bf = baseflowB(Yield_filled_mm)$bf, bfi = baseflowB(Yield_filled_mm)$bfi) %>%
  ungroup()
head(dat_wb_bf)
# A tibble: 6 × 10
  site_name   basin      subbasin   WaterYear date       Yield_filled_mm
  <chr>       <chr>      <chr>          <dbl> <date>               <dbl>
1 Avery Brook West Brook West Brook      2020 2020-02-20            1.43
2 Avery Brook West Brook West Brook      2020 2020-02-21            1.42
3 Avery Brook West Brook West Brook      2020 2020-02-22            1.22
4 Avery Brook West Brook West Brook      2020 2020-02-23            1.28
5 Avery Brook West Brook West Brook      2020 2020-02-24            1.40
6 Avery Brook West Brook West Brook      2020 2020-02-25            1.76
# ℹ 4 more variables: flow_mean_filled_cms <dbl>, area_sqkm <dbl>, bf <dbl>,
#   bfi <dbl>
Code
dat_wb_bf %>% filter(site_name == "West Brook NWIS") %>% select(date, Yield_filled_mm, bf, bfi) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Total daily streamflow in yield (mm), baseflow in yield, and fractional baseflow index over the period of record for site West Brook NWIS

6.3.2 Spread Creek

Perform baseflow separation for spread creek, an example of a snowmelt dominated stream. Note that with the default parameters, the ~entire summer/fall period could potentially be classified as a single event

Code
alp <- 0.925
dat_sh <- dat %>% 
  filter(site_name %in% c("SF Spread Creek Lower NWIS")) %>% 
  filter(!is.na(Yield_filled_mm)) %>% 
  select(site_name, basin, subbasin, WaterYear, date, Yield_filled_mm) %>%
  group_by(site_name) %>%
  mutate(bf = baseflowB(Yield_filled_mm, alpha = alp)$bf, bfi = baseflowB(Yield_filled_mm, alpha = alp)$bfi) %>%
  ungroup()
dat_sh %>% filter(site_name == "SF Spread Creek Lower NWIS") %>% select(date, Yield_filled_mm, bf, bfi) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Total daily streamflow in yield (mm), baseflow in yield, and fractional baseflow index over the period of record for site SF Spread Creek Lower NWIS

6.4 Event identification

There are multiple options for methods of event identification. Section 4.2 in Wasko and Guo (2022): It can be generalized that, if the aim of the analysis is to identify independent streamflow maxima then eventMaxima() and eventMinima() work well and indeed have been traditionally employed for this task. If identifying independent small events becomes difficult, or the aim is to identify wet spells, eventBaseflow() may be preferred.” In our case, small events are likely important, so we use eventBaseflow() with a ~large values for BFI_Th to capture those small events.

The aim is to understand how water availability affects differences in yield between Big G and Little g during hydrologic events and the intervening periods (non-event baseflow periods). Therefore, we identify events from the Big G data, apply event/non-event time periods to the little g data, then explore G-g deviations during both events and non-events as a function of

Separate Big G and Little G data

Code
dat_big <- dat_wb_bf %>% filter(site_name == "West Brook NWIS")
dat_little <- dat_wb_bf %>% filter(site_name != "West Brook NWIS")

Identify events at Big G:

Code
events <- eventBaseflow(dat_big$Yield_filled_mm, BFI_Th = 0.75)
events <- events %>% mutate(len = end - srt + 1)
head(events)
  srt end which.max      max       sum len
1   6   9         7 2.528865  7.652927   4
2  12  15        13 2.006509  6.881830   4
3  26  31        27 6.351183 19.045257   6
4  31  35        33 3.026347 11.881521   5
5  41  44        42 3.125843 10.115461   4
6  47  50        48 3.125843 10.156918   4

Plot Big G events using the default function

Code
plotEvents(dat_big$Yield_filled_mm, events = events)

Time series of hydrologic events at Big G, identified using eventBaseflow().

Now add variables to the Big G time series data specifying events and non-events

Code
# define positions of non-events
srt <- c(1)
end <- c(events$srt[1]-1)
for (i in 2:(dim(events)[1])) {
  srt[i] <- events$end[i-1]+1
  end[i] <- events$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events$end[dim(events)[1]]+1, end = dim(dat_big)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_big)[1])
eventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(events)[1]) { 
  isevent_vec[c(events[i,1]:events[i,2])] <- 1 
  eventid_vec[c(events[i,1]:events[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_big)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_big <- dat_big %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         big_event_yield = ifelse(isevent_vec == 1, Yield_filled_mm, NA),
         big_nonevent_yield = ifelse(isevent_vec == 2, Yield_filled_mm, NA),
         big_event_quick = big_event_yield - bf) %>%
  rename(big_yield = Yield_filled_mm, big_bf = bf, big_bfi = bfi, big_flow = flow_mean_filled_cms, big_area_sqkm = area_sqkm)
(dat_big)
# A tibble: 1,780 × 17
   site_name       basin      subbasin   WaterYear date       big_yield big_flow
   <chr>           <chr>      <chr>          <dbl> <date>         <dbl>    <dbl>
 1 West Brook NWIS West Brook West Brook      2020 2020-02-01      1.57    0.535
 2 West Brook NWIS West Brook West Brook      2020 2020-02-02      1.52    0.518
 3 West Brook NWIS West Brook West Brook      2020 2020-02-03      1.45    0.496
 4 West Brook NWIS West Brook West Brook      2020 2020-02-04      1.41    0.481
 5 West Brook NWIS West Brook West Brook      2020 2020-02-05      1.42    0.484
 6 West Brook NWIS West Brook West Brook      2020 2020-02-06      1.51    0.515
 7 West Brook NWIS West Brook West Brook      2020 2020-02-07      2.53    0.864
 8 West Brook NWIS West Brook West Brook      2020 2020-02-08      2.04    0.697
 9 West Brook NWIS West Brook West Brook      2020 2020-02-09      1.58    0.538
10 West Brook NWIS West Brook West Brook      2020 2020-02-10      1.47    0.501
# ℹ 1,770 more rows
# ℹ 10 more variables: big_area_sqkm <dbl>, big_bf <dbl>, big_bfi <dbl>,
#   isevent <dbl>, eventid <int>, noneventid <int>, agneventid <int>,
#   big_event_yield <dbl>, big_nonevent_yield <dbl>, big_event_quick <dbl>
Code
dat_big %>% select(date, big_yield, big_bf, big_event_yield, big_nonevent_yield) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Time series of hydrologic events at Big G, identified using eventBaseflow().

Applying Big G event/non-event periods to little g time series data inherently assumes that event/non-event periods would be similarly delineated for little g. If this assumption does not hold, then non-event little g flow would be included in event periods, and vice-versa. How well does this assumption hold?

At the hourly time-scale, how aligned are the time series data? Typically, Big G lags behind Little G by 2-4 hours, but this depends on the site.

Code
dat_little %>% select(date, site_name, Yield_filled_mm) %>% spread(key = site_name, value = Yield_filled_mm) %>% left_join(dat_big %>% select(date, big_flow)) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Yield (mm)") %>% dyOptions(colors = c(hcl.colors(9, "Zissou 1"), "black")) 

How does event delineation change?

Code
sites <- c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")

dat_little2 <- dat_little %>% filter(site_name == "Jimmy Brook")
events_little <- eventBaseflow(dat_little2$Yield_filled_mm, BFI_Th = 0.75)

# define positions of non-events
srt <- c(1)
end <- c(events_little$srt[1]-1)
for (i in 2:(dim(events_little)[1])) {
  srt[i] <- events_little$end[i-1]+1
  end[i] <- events_little$srt[i]-1
}
nonevents <- data.frame(tibble(srt, end) %>% mutate(len = end - srt) %>% filter(len >= 0) %>% select(-len) %>% add_row(srt = events_little$end[dim(events_little)[1]]+1, end = dim(dat_little2)[1]))

# create vectors of binary event/non-event and event IDs
isevent_vec <- rep(2, times = dim(dat_little2)[1])
eventid_vec <- rep(NA, times = dim(dat_little2)[1])
for (i in 1:dim(events_little)[1]) { 
  isevent_vec[c(events_little[i,1]:events_little[i,2])] <- 1 
  eventid_vec[c(events_little[i,1]:events_little[i,2])] <- i
}

# create vector of non-event IDs
noneventid_vec <- rep(NA, times = dim(dat_little2)[1])
for (i in 1:dim(nonevents)[1]) { noneventid_vec[c(nonevents[i,1]:nonevents[i,2])] <- i }

# create vector of "agnostic events": combined hydro events and non-events
agnevents <- rbind(events_little %>% select(srt, end) %>% mutate(event = 1), nonevents %>% mutate(event = 0)) %>% arrange((srt))
agneventid_vec <- c()
for (i in 1:dim(agnevents)[1]){ agneventid_vec[c(agnevents[i,1]:agnevents[i,2])] <- i }

# add event/non-event vectors to Big G data
dat_little2 <- dat_little2 %>% 
  mutate(isevent = isevent_vec, 
         eventid = eventid_vec,
         noneventid = noneventid_vec,
         agneventid = agneventid_vec,
         little_event_yield = ifelse(isevent_vec == 1, Yield_filled_mm, NA),
         little_nonevent_yield = ifelse(isevent_vec == 2, Yield_filled_mm, NA),
         little_event_quick = little_event_yield - bf) %>%
  rename(little_yield = Yield_filled_mm, little_bf = bf, little_bfi = bfi)


dat_big %>% select(date, big_event_yield) %>% left_join(dat_little2 %>% select(date, little_event_yield)) %>% dygraph() %>% dyRangeSelector() %>% dyAxis("y", label = "Daily yield (mm)") %>% dyOptions(fillGraph = TRUE, drawGrid = FALSE, axisLineWidth = 1.5)

Time series of yield for Big G (West Brook NWIS) and one little g site (Jimmy Brook) during hydrologic events as delineated for Big G and little g, respectively.

Whether or not events alight between G and g is highly variable. In some cases, g events begin/end prior to G events, and in other cases g events begin/end later G events. In some cases g events are shorter than G events, and in other cases they are longer. In many cases, events are perfectly matched. Importantly, peaks in yield are almost always synchronous.

Ultimately, does this matter given that we are simply using this as a method to break up our data? Furthermore, the framing of the ~entire project is that Big G is the reference by which to compare all little g’s. In this sense, applying event/non-event periods derived from G to g matches this persepctive.

6.5 Join events to Little g

Code
# wide
dat_wb2 <- dat_little %>% 
  filter(date >= min(dat_big$date) & date <= max(dat_big$date)) %>%
  left_join(dat_big %>% select(-site_name)) %>% 
  group_by(site_name, basin, subbasin, agneventid) %>% 
  summarise(eventlen = n(),
            mindate = min(date),
            isevent = unique(isevent), 
            yield_little_cum = sum(Yield_filled_mm+0.01),
            yield_big_cum = sum(big_yield+0.01),
            yield_little_cum_log = log(yield_little_cum),
            yield_big_cum_log = log(yield_big_cum),
            xxx_little = sum(flow_mean_filled_cms * 86400 * (1/unique(area_sqkm)) * (1/1000000) * 1000),
            yyy_little = sum(flow_mean_filled_cms * 86400) * (1/unique(area_sqkm)) * (1/1000000) * 1000,
            yield_little_mean_log = mean(log(Yield_filled_mm+0.01)),
            yield_big_mean_log = mean(log(big_yield+0.01)),
            yield_little_q25_log = quantile(log(Yield_filled_mm+0.01), probs = 0.25),
            yield_little_q50_log = quantile(log(Yield_filled_mm+0.01), probs = 0.50),
            yield_little_q75_log = quantile(log(Yield_filled_mm+0.01), probs = 0.75),
            yield_big_q25_log = quantile(log(big_yield+0.01), probs = 0.25),
            yield_big_q50_log = quantile(log(big_yield+0.01), probs = 0.50),
            yield_big_q75_log = quantile(log(big_yield+0.01), probs = 0.75)) %>%
  ungroup() %>%
  mutate(site_name = factor(site_name, levels = c("West Brook Lower", "Mitchell Brook", "Jimmy Brook", "Obear Brook Lower", "West Brook Upper", "West Brook Reservoir", "Sanderson Brook", "Avery Brook", "West Whately Brook")),
         site_name_cd = as.numeric(site_name),
         z_yield_big_cum_log = as.numeric(scale(yield_big_cum_log, center = TRUE, scale = TRUE)),
         z_yield_big_mean_log = as.numeric(scale(yield_big_mean_log, center = TRUE, scale = TRUE)))

# plot(yield_little_cum ~ xxx_little, dat_wb2, main = "cumulative yield derived using fasstr ~ by-hand")
# abline(a = 0, b = 1, col = "red")
# plot(yyy_little ~ xxx_little, dat_wb2, main = "cumulative yield derived from cumulative flow ~ summed daily yield")
# abline(a = 0, b = 1, col = "red")
# plot(yield_little_mean_log ~ yield_little_cum_log, dat_wb2, main = "mean log yield ~ cumulative log yield")
# abline(a = 0, b = 1, col = "red")

# write to file
write_csv(dat_wb2, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Event Delineation/EcoDrought_Data_EventNonEvent_WestBrookonly.csv")

View relationship between Big G and little g, color by site, facet by event/non-event.

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cum_log, y = yield_little_cum_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods.

What does this look like if we use mean yield per event/non-event period?

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + facet_wrap(~isevent)

Effect of mean (log) yield at Big G on mean (log) yield at little g during baseflow and event periods.

What does this look like if we use different quantiles? Generally the relationships appear the be the same, just shifted up along the axes to reflect slightly higher flows. Although, note that because we do this on daily data, we are missing a certain about of within-day variability

Code
p1 <- dat_wb2 %>% 
  ggplot(aes(x = yield_big_q25_log, y = yield_little_q25_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + xlim(-4,2) + ylim(-5,2.75) + theme(legend.position="none")
p2 <- dat_wb2 %>% 
  ggplot(aes(x = yield_big_q50_log, y = yield_little_q50_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + xlim(-4,2) + ylim(-5,2.75) + theme(legend.position="none")
p3 <- dat_wb2 %>% 
  ggplot(aes(x = yield_big_q75_log, y = yield_little_q75_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = F) + xlim(-4,2) + ylim(-5,2.75) + theme(legend.position="none")
ggarrange(p1, p2, p3, nrow = 1)

Effect of mean (log) yield at Big G on mean (log) yield at little g during baseflow and event periods.

View relationship between Big G and little g, color by event/non-event, facet by site. For most sites (except may Obear Brook), G-g relationships are identical between events and non-event.

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cum_log, y = yield_little_cum_log, group = isevent, color = isevent)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm") + facet_wrap(~site_name)

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g during baseflow and event periods.
Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = isevent, color = isevent)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm") + facet_wrap(~site_name)

Effect of mean (log) yield at Big G on mean (log) yield at little g during baseflow and event periods.

View relationship between Big G and little g, color by site, all together

Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_cum_log, y = yield_little_cum_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = T)

Effect of (log) cumulative yield at Big G on (log) cumulative yield at little g.
Code
dat_wb2 %>% 
  mutate(isevent = dplyr::recode(isevent, "1" = "Event", "2" = "Baseflow")) %>% 
  ggplot(aes(x = yield_big_mean_log, y = yield_little_mean_log, group = site_name, color = site_name)) + 
  geom_point() + geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  geom_smooth(method = "lm", se = T)

Effect of mean (log) yield at Big G on mean (log) yield at little g.

View relationship between Big G and among-site/event-specific standard deviation in little g.

Code
dat_wb2 %>% 
  select(agneventid, yield_big_cum_log, yield_little_cum_log) %>% 
  group_by(agneventid) %>% 
  summarize(unq_big = unique(yield_big_cum_log),
            sd_little = sd(yield_little_cum_log)) %>% 
  ggplot(aes(x = unq_big, y = sd_little)) + geom_point() + geom_smooth() + ylim(0,1.5)

Derived from log cumulative yield
Code
dat_wb2 %>% 
  select(agneventid, yield_big_mean_log, yield_little_mean_log) %>% 
  group_by(agneventid) %>% 
  summarize(unq_big = unique(yield_big_mean_log),
            sd_little = sd(yield_little_mean_log)) %>% 
  ggplot(aes(x = unq_big, y = sd_little)) + geom_point() + geom_smooth() + ylim(0,1.5)

Derived from mean log yield